options(scipen = 999, digits = 4)
knitr::opts_chunk$set(comment = "#")
r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"

options(repos = r)

## load required packages
ipak <- function (pkg) {
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg)) 
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

packages <- c("tidyverse", "Hmisc", "lattice", "multcomp", "lsmeans", "schoRsch", "influence.ME", "lme4", "effects", "lmerTest", "cowplot", "irr", "simr", "plyr", "dplyr","patchwork", "wesanderson", "MuMIn", "devtools", "dplyr", "ggResidpanel", "HLMdiag", "mixed", "sjPlot")

ipak(packages)
## Warning: package 'mixed' is not available (for R version 4.0.2)
## Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
## when loading 'dplyr'
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'mixed'
##    tidyverse        Hmisc      lattice     multcomp      lsmeans     schoRsch 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
## influence.ME         lme4      effects     lmerTest      cowplot          irr 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##         simr         plyr        dplyr    patchwork  wesanderson        MuMIn 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##     devtools        dplyr ggResidpanel      HLMdiag        mixed       sjPlot 
##         TRUE         TRUE         TRUE         TRUE        FALSE         TRUE
# set summed contrasts
options(contrasts = c("contr.sum", "contr.poly")) 

# fix bs with dplyr 
detach("package:dplyr", unload = TRUE)
## Warning: 'dplyr' namespace cannot be unloaded:
##   namespace 'dplyr' is imported by 'tidyr', 'sjPlot', 'plotly', 'broom', 'janitor', 'dbplyr', 'sjmisc', 'sjstats', 'qqplotr', 'HLMdiag' so cannot be unloaded
library("dplyr")
ind.data.pre <- read.csv("./ma_study_data.csv", header=TRUE) %>%
  mutate(experiment=str_sub(MA_condition,1,1)) %>%
  mutate_if(is.character,as.factor) %>%
  mutate(subj = as.factor(subj),
         condition = MA_condition)%>%
  mutate_at(.vars="condition", .funs = tolower)

ma.data <- read.csv("./ma_study_designs_updated.csv") %>%
  rename(paper = study_ID,
         condition = expt_condition,
         experiment = expt_num,
         task = looking_task) %>%
    mutate_if(is.character,as.factor) %>%
  mutate_at(.vars="condition", .funs = tolower)

study.design <- ma.data %>%
  select(paper, experiment, condition, task, training_yesno, action_consequence, actor_hand, agent_efficient_fam, object_diff_size_huge, action_causal, action_consequence, location_object_goal_ambiguous, bothobjects_present_visible_fam, agent, PI_group) %>%
    mutate_if(is.character,as.factor) %>%
  mutate(experiment = as.factor(experiment)) %>%
  filter(paper != "sommerville2005",
         task != "causes")

# merge study design with individual looks from babies

ind.data <- left_join(ind.data.pre, study.design, 
                      by=c("paper", "experiment", "condition")) %>%
  mutate(sex = tolower(str_sub(sex,1,1)),
         look_pref = unexp_look - exp_look) %>%
  mutate(paper = str_replace_all(paper, "unpublished", "unpub")) %>%
  mutate(task = str_replace_all(task, "efficiency", "constraints")) %>%
  mutate_if(is.character,as.factor) 

str(ind.data)
# 'data.frame': 650 obs. of  21 variables:
#  $ subj                           : Factor w/ 650 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
#  $ paper                          : Factor w/ 8 levels "choi_unpub","choi2018",..: 6 6 6 6 6 6 6 6 6 6 ...
#  $ MA_condition                   : Factor w/ 26 levels "1_Active","1_Control",..: 12 12 12 12 12 12 12 12 12 12 ...
#  $ ageday                         : num  103.3 86.9 105.3 129.8 107.3 ...
#  $ exp_look                       : num  13.1 18.4 15.1 7.1 5.8 9 18.9 6 31 42.9 ...
#  $ unexp_look                     : num  60 60 46.3 13.6 12 60 8 21 60 42.7 ...
#  $ sex                            : Factor w/ 2 levels "f","m": 2 2 2 1 1 1 2 2 2 1 ...
#  $ experiment                     : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ condition                      : Factor w/ 26 levels "1_active","1_control",..: 12 12 12 12 12 12 12 12 12 12 ...
#  $ task                           : Factor w/ 2 levels "constraints",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ training_yesno                 : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ action_consequence             : Factor w/ 3 levels "location_change",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ actor_hand                     : Factor w/ 3 levels "bare","gloved",..: NA NA NA NA NA NA NA NA NA NA ...
#  $ agent_efficient_fam            : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
#  $ object_diff_size_huge          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ action_causal                  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ location_object_goal_ambiguous : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
#  $ bothobjects_present_visible_fam: Factor w/ 3 levels "","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
#  $ agent                          : Factor w/ 3 levels "hand","object",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ PI_group                       : Factor w/ 5 levels "desrochers","luo",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ look_pref                      : num  46.9 41.6 31.2 6.5 6.2 ...
# View(ind.data)
# function that returns column of standardized betas from lmer model
gen.beta <- function(model) {
    df <- data.frame(fixef(model))
    names(df) <- c("beta")
    return(df)
}

# function that computes CIs and returns them in df
gen.ci <- function(model) {
  df <- data.frame(confint(model))
  names(df) <- c("lower", "upper")
  return(df)
}

# function that converts model summary (lmer) to df
gen.m <- function(model) {
  df <- data.frame(coef(summary(model)))
  names(df) <- c("b", "se", "df", "t", "p")
  return(df)
}

# function that converts model summary (lm) to df
gen.lm <- function(model) {
  df <- data.frame(coef(summary(model)))
  names(df) <- c("b", "se", "t", "p")
  return(df)
}

# function that returns age info and number of females in a dataset
ages <- function(longdata) {
  longdata %>% summarize(mean = mean(ageday), min=range(ageday)[1], max=range(ageday)[2], f=sum(sex=="f")/2)
}

# function that returns formatted result from lme4/lmerTest table
report <- function(table, index, places, tails, flip) {
  if (tails == "1") {
    p <- round(table$p[index], places)/2
    howmanytails <- "one-tailed"
  } else {
    p <- round(table$p[index], places)
    howmanytails <- "two-tailed"
  }
  if (p < .001) {
    p <- "<.001"
  } else {
    p <- paste("=", round(p, places), sep = "")
  }
  if (missing(flip)) {
    result <- paste("[", round(table$lower[index], places), ",", round(table$upper[index], places), "], ß=", round(table$beta[index], places), ", B=", round(table$b[index],places), ", SE=", round(table$se[index],places), ", p", p, ", ", howmanytails, sep = "")
  } else {
    result <- paste("[", -round(table$upper[index], places), ",", -round(table$lower[index], places), "], ß=", -round(table$beta[index], places), ", B=", -round(table$b[index],places), ", SE=", round(table$se[index],places), ", p", p, ", ", howmanytails, sep = "")
  }
  return(result)
}

se <- function(x, na.rm = FALSE) {
 if (na.rm) x <- na.omit(x)
 return(sd(x) / sqrt(length(x)))
}

describe <- function(dataset){
  summary <- dataset %>%
  summarise(lookdiff_avg = mean(look_pref, na.rm=TRUE),
            lookdiff_SE = se(look_pref,na.rm=TRUE),
            n = n())
  paste(
        #"M_unexp = ", summary$unexp_avg, "s ",
        #"M_exp = ", summary$exp_avg, "s ",
        "Mean looking preference = ", round(summary$lookdiff_avg,3), "seconds, ",
        "standard error (SE) = ", round(summary$lookdiff_SE,3)
  )
}
## Retrieved from : http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#error-bars-for-within-subjects-variables
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- plyr::rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}
## Norms the data within specified groups in a data frame; it normalizes each
## subject (identified by idvar) so that they have the same mean, within each group
## specified by betweenvars.
##   data: a data frame.
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   na.rm: a boolean that indicates whether to ignore NA's
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                           na.rm=FALSE, .drop=TRUE) {
  library(plyr)
  
  # Measure var on left, idvar + between vars on right of formula.
  data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
                         .fun = function(xx, col, na.rm) {
                           c(subjMean = mean(xx[,col], na.rm=na.rm))
                         },
                         measurevar,
                         na.rm
  )
  
  # Put the subject means with original data
  data <- merge(data, data.subjMean)
  
  # Get the normalized data in a new column
  measureNormedVar <- paste(measurevar, "_norm", sep="")
  data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
    mean(data[,measurevar], na.rm=na.rm)
  
  # Remove this subject mean column
  data$subjMean <- NULL
  
  return(data)
}

## Summarizes data, handling within-subjects variables by removing inter-subject variability.
## It will still work if there are no within-S variables.
## Gives count, un-normed mean, normed mean (with same between-group mean),
##   standard deviation, standard error of the mean, and confidence interval.
## If there are within-subject variables, calculate adjusted values using method from Morey (2008).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   withinvars: a vector containing names of columns that are within-subjects variables
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                            idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {
  
  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
                       FUN=is.factor, FUN.VALUE=logical(1))
  
  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }
  
  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                     na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
  
  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL
  
  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)
  
  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep="")
  
  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                      na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
  
  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                                  FUN.VALUE=numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
  
  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor
  
  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}
ind.data.long <- ind.data %>%
         rename(expected = exp_look,
                unexpected = unexp_look) %>%
         pivot_longer(cols = c(expected, unexpected),
                      names_to = "trial_type",
                      values_to = "looking_time")

ind.data.summary <- summarySEwithin(data = ind.data.long, measurevar = "looking_time", betweenvars = c("task", "paper", "experiment", "condition"), withinvars = "trial_type")
# Automatically converting the following non-factors to factors: trial_type
ind.pref.summary <- summarySE(data = ind.data, measurevar = "look_pref", groupvars = c("task", "paper", "experiment", "condition"))

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) 
  
n_infants <- ind.data.summary %>%
  filter(trial_type == "expected") %>% # just take one row per study
  summarise(sum(N))

n_conditions <- ind.data.summary %>%
  filter(trial_type == "expected") %>% # just take one row per study
  nrow()

n_papers <- ind.data.summary %>%
  filter(trial_type == "expected") %>% # just take one row per study
  group_by(paper) %>%
  count(paper) %>%
  nrow()

Summary of data sources

We searched for all journal papers, theses, and conference proceedings that reported looking time data from typically developing infants between 2 and 4 months of age in a task structured like the goals or constraints task. We also emailed two listservs (Cognitive Development Society, and Infant Studies) to request more datasets. This resulted in a final list of 11 papers and 35 conditions. We contacted the authors and asked them to send us the original datasets from this past published and unpublished work. We were able to gather original datasets from 8 papers, 30 conditions, and 650 infants (age range 72-137. A team of researchers were then randomly assigned to look through relevant papers including supplemental materials (with SL double-checking every entry and resolving disagreements). When exact values were not provided, or when we came across other ambiguities (e.g. numbers reported in the paper differ from the numbers calculated using the raw data), the team contacted authors to try and address, and also used the tool WebplotDigitizer (https://automeris.io/WebPlotDigitizer/) to extract estimated values from figures. If there were discrepancies between the paper, figures and/or raw data, we prioritized the values from the raw data if available, then author correspondence, then paper, then estimates from figures, in that order. For individual datasets, authors were asked to provide their data and a codebook, and were asked for permission to share their stimuli and data publicly on OSF. Of 8 papers (30, conditions), data from xx conditions and stimuli from xx conditions (either actual study videos, or example stimuli) are publicly available at https://osf.io/zwncg/.

table.goals <- ind.data %>%
  filter(task=="goals") %>%
  group_by(paper, condition, training_yesno, action_causal, action_consequence, location_object_goal_ambiguous, bothobjects_present_visible_fam, agent) %>%
  summarise(n = n(), agemin = floor(range(ageday)[1]), agemax = floor(range(ageday)[2])) %>%
  select(paper, condition, n, agemin, agemax, training_yesno, action_causal, action_consequence, location_object_goal_ambiguous, bothobjects_present_visible_fam, agent)
# `summarise()` regrouping output by 'paper', 'condition', 'training_yesno', 'action_causal', 'action_consequence', 'location_object_goal_ambiguous', 'bothobjects_present_visible_fam' (override with `.groups` argument)
table.constraints <- ind.data %>%
  filter(task=="constraints") %>%
  group_by(paper, condition, training_yesno, action_causal, action_consequence, agent_efficient_fam, actor_hand) %>%
  summarise(n = n(), agemin = floor(range(ageday)[1]), agemax = floor(range(ageday)[2])) %>%
  select(paper, condition, n, agemin, agemax, training_yesno, action_causal, action_consequence, agent_efficient_fam, actor_hand)
# `summarise()` regrouping output by 'paper', 'condition', 'training_yesno', 'action_causal', 'action_consequence', 'agent_efficient_fam' (override with `.groups` argument)
knitr::kable(table.goals)
paper condition n agemin agemax training_yesno action_causal action_consequence location_object_goal_ambiguous bothobjects_present_visible_fam agent
choi_unpub 1_equidistant 24 76 137 no no none yes yes person
choi_unpub 1_fartarget 24 72 130 no no none yes no person
choi_unpub 1_neartarget 24 74 127 no no none yes yes person
choi2018 1_hidden 16 75 121 no no none yes yes person
choi2018 1_oneobject 16 75 127 no no none yes yes person
choi2018 1_twoobject 16 77 130 no no none yes yes person
gerson2014a 1_active 24 91 121 yes no none yes yes hand
gerson2014a 1_control 24 91 120 no no none yes yes hand
gerson2014a 1_observation 24 91 121 no no none yes yes hand
gerson2014b 1_active 30 91 125 yes no none yes yes hand
gerson2014b 1_observation 30 92 125 no no none yes yes hand
gerson2014b 2_generalization 30 94 125 yes no none yes yes hand
luo2011 1_oneobject 12 76 124 no no none yes no object
luo2011 1_twoobject 12 79 129 no no none yes yes object
luo2011 2_differentpositions 12 81 118 no no none no no object
woo_unpub 1_statechange_objectswap 20 93 120 no yes state_change yes yes person
woo_unpub 2_disambiguatingobjectgoal 24 91 121 no yes state_change no yes person
woo_unpub 3_disambiguatinglocationgoal 24 90 121 no yes state_change no yes person
knitr::kable(table.constraints)
paper condition n agemin agemax training_yesno action_causal action_consequence agent_efficient_fam actor_hand
liu2019 1_pickupglove 20 92 122 no yes location_change yes gloved
liu2019 2_pickupbarehand 20 93 120 no yes location_change yes bare
liu2019 3_statechangebarrier 20 91 122 no yes state_change yes gloved
liu2019 3_statechangenobarrier 20 91 122 no yes state_change no gloved
liu2019 4_statechangenotcausal 20 93 121 no no state_change yes gloved
liu2019 5_statechangecausal 26 92 121 no yes state_change yes gloved
liu2019 5_statechangenotcausal 26 93 120 no no state_change yes gloved
skerry2013 1_effectiveactiontraining 20 93 121 yes yes location_change yes mittened
skerry2013 2_ineffectiveactiontraining 20 93 122 no yes location_change yes mittened
skerry2013 3_notraining 20 91 121 no yes location_change yes mittened
skerry2013 4_constrainedactionhabituation 26 93 120 yes yes location_change yes mittened
skerry2013 5_unconstrainedactionhabituation 26 93 122 yes yes location_change no mittened

Data appear to be normally distributed, no need to log transform.

str(ind.data)
# 'data.frame': 650 obs. of  21 variables:
#  $ subj                           : Factor w/ 650 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
#  $ paper                          : Factor w/ 8 levels "choi_unpub","choi2018",..: 6 6 6 6 6 6 6 6 6 6 ...
#  $ MA_condition                   : Factor w/ 26 levels "1_Active","1_Control",..: 12 12 12 12 12 12 12 12 12 12 ...
#  $ ageday                         : num  103.3 86.9 105.3 129.8 107.3 ...
#  $ exp_look                       : num  13.1 18.4 15.1 7.1 5.8 9 18.9 6 31 42.9 ...
#  $ unexp_look                     : num  60 60 46.3 13.6 12 60 8 21 60 42.7 ...
#  $ sex                            : Factor w/ 2 levels "f","m": 2 2 2 1 1 1 2 2 2 1 ...
#  $ experiment                     : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ condition                      : Factor w/ 26 levels "1_active","1_control",..: 12 12 12 12 12 12 12 12 12 12 ...
#  $ task                           : Factor w/ 2 levels "constraints",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ training_yesno                 : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ action_consequence             : Factor w/ 3 levels "location_change",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ actor_hand                     : Factor w/ 3 levels "bare","gloved",..: NA NA NA NA NA NA NA NA NA NA ...
#  $ agent_efficient_fam            : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
#  $ object_diff_size_huge          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ action_causal                  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#  $ location_object_goal_ambiguous : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
#  $ bothobjects_present_visible_fam: Factor w/ 3 levels "","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
#  $ agent                          : Factor w/ 3 levels "hand","object",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ PI_group                       : Factor w/ 5 levels "desrochers","luo",..: 2 2 2 2 2 2 2 2 2 2 ...
#  $ look_pref                      : num  46.9 41.6 31.2 6.5 6.2 ...
# data appear to be normally distributed
dist1 <- ggplot(ind.data, aes(x=look_pref)) +
  geom_histogram()+
  theme_cowplot(12)

dist2 <- ggplot(ind.data, aes(x=look_pref)) +
  geom_histogram()+
  facet_wrap(~paper,nrow=3)+
  theme_cowplot(12)

(dist1 | dist2)  + plot_layout(widths = c(1,4)) + plot_annotation(tag_levels = 'A')
# `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plots

Looking to the expected vs unexpected event for all individual conditions.

plot1 <- ggplot(ind.data.long,
       aes(trial_type, looking_time, colour=paper))+
  theme_cowplot(12)+
  theme(legend.title = element_blank())+
  geom_boxplot(outlier.alpha = 0.2)+
  geom_point(alpha=0.2)+
  geom_line(aes(group=subj), alpha=0.2)+
  stat_summary(geom="point", fun="mean", colour="black")+
  # geom_errorbar(data = ind.data.summary, colour="red", position = position_dodge(width = 5), width = 0, aes(ymin=looking_time-se, ymax=looking_time+se)) +
  stat_summary(geom="errorbar", fun.data="mean_se", width=0.2, colour="black")+
  ylab("Looking Time (s)")+
  xlab("Condition")+
  facet_wrap(~task+paper+condition, nrow=4, labeller=label_wrap_gen())+
  theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust =0.2),
        legend.position="bottom")
  
plot1

Looking preference (unexpected - expected) for all individual conditions.

plot2.goals <- ggplot(ind.data %>% filter(task == "goals"),
                 aes(condition, look_pref, fill=paper)) +
  geom_boxplot(outlier.alpha = 0.2, position = position_dodge2(preserve = "single"))+
  geom_hline(yintercept = 0)+
  geom_point(alpha=0.2)+
  stat_summary(geom="point", fun="mean", colour="black")+
  stat_summary(geom="errorbar", fun.data="mean_se", width=0.2, colour="black")+
  theme_cowplot(12)+
  facet_grid(~paper, scales = "free_x",space = "free", labeller = label_wrap_gen())+
  ylab("Looking preference (s) \n <- Expected ------- Unexpected ->")+
  coord_cartesian(ylim=c(-40,40))+
  theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust =0.2), legend.position = "none")+
  scale_fill_brewer(palette = "Dark2")

plot2.constraints <- ggplot(ind.data %>% filter(task == "constraints"),
                 aes(condition, look_pref, fill=paper)) +
  geom_boxplot(outlier.alpha = 0.2, position = position_dodge2(preserve = "single"))+
  geom_hline(yintercept = 0)+
  geom_point(alpha=0.2)+
  stat_summary(geom="point", fun="mean", colour="black")+
  stat_summary(geom="errorbar", fun.data="mean_se", width=0.2, colour="black")+
  theme_cowplot(12)+
  facet_grid(~paper,scales = "free_x",space = "free", labeller = label_wrap_gen())+
  ylab("")+
  xlab("")+
  coord_cartesian(ylim=c(-40,40))+
  theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust =0.2), legend.position = "none")+
  scale_fill_manual(values = wes_palette("Royal2"))


(plot2.goals | plot2.constraints) + plot_layout(widths = c(2, 1)) + plot_annotation(tag_levels = 'A')

Looking preference (unexpected - expected) vs age for all individual conditions.

(plot3 <- ggplot(ind.data,
                 aes(ageday, look_pref, colour=paper)) +
  # geom_boxplot(outlier.alpha = 0.2, position = position_dodge2(preserve = "single"))+
  geom_hline(yintercept = 0)+
  geom_point()+
  geom_smooth(method="lm")+
  # stat_summary(geom="point", fun="mean", colour="black")+
  # stat_summary(geom="errorbar", fun.data="mean_se", width=0.2, colour="black")+
  theme_cowplot(12)+
  facet_grid(~task+paper)+
  ylab("Looking preference (s) \n <- Expected ------- Unexpected ->")+
  theme(axis.text.x = element_text(angle = 90, hjust=0.95, vjust =0.2)))
# `geom_smooth()` using formula 'y ~ x'

Confirmatory Analysis

Constraints Task

constraints <- ind.data %>% filter(task =="constraints")

# checking ref levels for all factors
constraints$actor_hand
#   [1] mittened mittened mittened mittened mittened mittened mittened mittened
#   [9] mittened mittened mittened mittened mittened mittened mittened mittened
#  [17] mittened mittened mittened mittened mittened mittened mittened mittened
#  [25] mittened mittened mittened mittened mittened mittened mittened mittened
#  [33] mittened mittened mittened mittened mittened mittened mittened mittened
#  [41] mittened mittened mittened mittened mittened mittened mittened mittened
#  [49] mittened mittened mittened mittened mittened mittened mittened mittened
#  [57] mittened mittened mittened mittened mittened mittened mittened mittened
#  [65] mittened mittened mittened mittened mittened mittened mittened mittened
#  [73] mittened mittened mittened mittened mittened mittened mittened mittened
#  [81] mittened mittened mittened mittened mittened mittened mittened mittened
#  [89] mittened mittened mittened mittened mittened mittened mittened mittened
#  [97] mittened mittened mittened mittened mittened mittened mittened mittened
# [105] mittened mittened mittened mittened mittened mittened mittened mittened
# [113] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [121] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [129] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [137] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [145] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [153] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [161] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [169] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [177] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [185] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [193] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [201] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [209] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [217] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [225] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [233] gloved   gloved   gloved   gloved   gloved   gloved   gloved   gloved  
# [241] gloved   gloved   gloved   gloved   bare     bare     bare     bare    
# [249] bare     bare     bare     bare     bare     bare     bare     bare    
# [257] bare     bare     bare     bare     bare     bare     bare     bare    
# Levels: bare gloved mittened
constraints$action_consequence <- relevel(constraints$action_consequence, ref = "state_change")
constraints$agent_efficient_fam <- relevel(constraints$agent_efficient_fam, ref = "yes")
constraints$training_yesno <- relevel(constraints$training_yesno, ref = "yes")
constraints$action_causal <- relevel(constraints$action_causal, ref = "yes")

Baseline Effect

constraints.baseline.data <- constraints %>%
  filter(condition %in% c("3_notraining",
                          "1_pickupglove",
                          "2_pickupbarehand"))


constraints.b1 <- lmer(data = constraints.baseline.data,
                       formula = look_pref ~ 1 + ageday + (1|condition))
summary(constraints.b1)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: look_pref ~ 1 + ageday + (1 | condition)
#    Data: constraints.baseline.data
# 
# REML criterion at convergence: 366.4
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.7189 -0.5297 -0.0435  0.7252  2.2686 
# 
# Random effects:
#  Groups    Name        Variance Std.Dev.
#  condition (Intercept)  2.03    1.42    
#  Residual              25.37    5.04    
# Number of obs: 60, groups:  condition, 3
# 
# Fixed effects:
#             Estimate Std. Error      df t value Pr(>|t|)
# (Intercept)  -2.3930     9.0407 57.2416   -0.26     0.79
# ageday        0.0258     0.0832 56.0050    0.31     0.76
# 
# Correlation of Fixed Effects:
#        (Intr)
# ageday -0.993
resid_panel(constraints.b1, plots = "all",
                          smoother = TRUE,
                          qqbands = TRUE)
# `geom_smooth()` using formula 'y ~ x'
# `geom_smooth()` using formula 'y ~ x'

constraints.b1.table <- sjPlot::tab_model(constraints.b1,
                                           show.std =TRUE,
                                           show.stat=TRUE,
                                           show.df=TRUE)

constraints.b1.beta <- summary(effectsize::standardize(constraints.b1))

constraints.baseline <- cbind(
  gen.beta(effectsize::standardize(constraints.b1)),
  gen.m(constraints.b1),
  gen.ci(constraints.b1)[3:4,]
) 
# Computing profile confidence intervals ...
constraints.baseline
#                                beta        b      se    df       t      p
# (Intercept) -0.00000000000000002645 -2.39296 9.04071 57.24 -0.2647 0.7922
# ageday       0.03966247939082377660  0.02582 0.08316 56.01  0.3105 0.7574
#                lower   upper
# (Intercept) -20.1694 15.4724
# ageday       -0.1389  0.1897
cooks1 <- cooks.distance(constraints.b1, group = "subj")
# Warning in cooks.distance.lmerMod(constraints.b1, group = "subj"): group is not
# a valid argument for this function. As of version 0.4.0, group has been replaced
# by level.
dotplot_diag(x = cooks1, cutoff = "internal", name = "cooks.distance",  index=constraints.baseline.data$subj) + ylab("Cook's distance") + xlab("subjID")

excluded.subs <- c("90", "553", "86") 

constraints.b1.cooks <- lmer(data = constraints.baseline.data %>%
                         filter(!subj %in% excluded.subs),
                       formula = look_pref ~ 1 + ageday + (1|condition))


summary(constraints.b1.cooks)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: look_pref ~ 1 + ageday + (1 | condition)
#    Data: constraints.baseline.data %>% filter(!subj %in% excluded.subs)
# 
# REML criterion at convergence: 338.6
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5544 -0.5380 -0.0717  0.7259  2.4971 
# 
# Random effects:
#  Groups    Name        Variance Std.Dev.
#  condition (Intercept)  1.14    1.07    
#  Residual              21.61    4.65    
# Number of obs: 57, groups:  condition, 3
# 
# Fixed effects:
#             Estimate Std. Error      df t value Pr(>|t|)
# (Intercept)  -9.5734     9.0731 54.2948   -1.06     0.30
# ageday        0.0926     0.0838 53.5855    1.11     0.27
# 
# Correlation of Fixed Effects:
#        (Intr)
# ageday -0.995
plot(allEffects(constraints.b1.cooks))

constraints.baseline.cooks <- cbind(
  gen.beta(effectsize::standardize(constraints.b1.cooks)),
  gen.m(constraints.b1.cooks),
  gen.ci(constraints.b1.cooks)[3:4,]
) 
# Computing profile confidence intervals ...

For standard versions of the constraints task, we found no differential looking between the expected and unexpected events, Mean looking preference = 0.395 seconds, standard error (SE) = 0.663, [-0.139,0.19], ß=0.04, B=0.026, SE=0.083, p=0.757, two-tailed. This result held when we removed 3 influential observations, identified using Cook’s distance, [-0.07,0.263], ß=0.145, B=0.093, SE=0.084, p=0.274, two-tailed.

Intervention Analysis

constraints.m1 <- lmer(data = constraints,
     formula = look_pref ~ training_yesno + action_causal + action_consequence + actor_hand + agent_efficient_fam + ageday + (1|condition) + (1|experiment) + (1|paper))
# boundary (singular) fit: see ?isSingular
# Warning in as_lmerModLT(model, devfun): Model may not have converged with 1
# eigenvalue close to zero: -3.1e-11
summary(constraints.m1)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: look_pref ~ training_yesno + action_causal + action_consequence +  
#     actor_hand + agent_efficient_fam + ageday + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: constraints
# 
# REML criterion at convergence: 1682
# 
# Scaled residuals: 
#    Min     1Q Median     3Q    Max 
# -3.729 -0.510 -0.059  0.561  3.866 
# 
# Random effects:
#  Groups     Name        Variance Std.Dev.
#  condition  (Intercept)  0.000   0.000   
#  experiment (Intercept)  0.000   0.000   
#  paper      (Intercept)  0.121   0.347   
#  Residual               35.324   5.943   
# Number of obs: 264, groups:  condition, 12; experiment, 5; paper, 2
# 
# Fixed effects:
#                      Estimate Std. Error       df t value Pr(>|t|)    
# (Intercept)           -2.8929     4.6171 256.0000   -0.63  0.53151    
# training_yesno1        2.1859     0.6174 256.0000    3.54  0.00047 ***
# action_causal1         2.3185     0.5944 256.0000    3.90  0.00012 ***
# action_consequence1    1.0693     0.7763 256.0000    1.38  0.16959    
# actor_hand1            1.3018     1.0518 256.0000    1.24  0.21698    
# actor_hand2            0.8084     1.0518 256.0000    0.77  0.44282    
# agent_efficient_fam1   1.7116     0.5377 256.0000    3.18  0.00164 ** 
# ageday                 0.0231     0.0419 256.0000    0.55  0.58139    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_cs1 actn_cn1 actr_1 actr_2 agn__1
# tranng_ysn1  0.062                                              
# action_csl1 -0.143  0.085                                       
# actn_cnsqn1 -0.010  0.065  0.349                                
# actor_hand1  0.124  0.227 -0.001    0.360                       
# actor_hand2 -0.060  0.227  0.000   -0.721   -0.597              
# agnt_ffcn_1 -0.092  0.314  0.275    0.210    0.000  0.000       
# ageday      -0.982 -0.017  0.053    0.036   -0.010 -0.008  0.018
# convergence code: 0
# boundary (singular) fit: see ?isSingular
resid_panel(constraints.m1, plots = "all",
                          smoother = TRUE,
                          qqbands = TRUE)
# `geom_smooth()` using formula 'y ~ x'
# `geom_smooth()` using formula 'y ~ x'

cooks1 <- cooks.distance(constraints.m1, group = "subj")
# Warning in cooks.distance.lmerMod(constraints.m1, group = "subj"): group is not
# a valid argument for this function. As of version 0.4.0, group has been replaced
# by level.
dotplot_diag(x = cooks1, cutoff = "internal", name = "cooks.distance", index=constraints$subj) + ylab("Cook's distance") + xlab("ID")

sjPlot::plot_model(constraints.m1,
                   type = "est",
                   colors = "bw",
                   sort.est=TRUE,
                   axis.title=c("Effect (Unexpected - Expected in seconds)"),
                   show.values=TRUE,
                   show.p=TRUE)

constraints.m1.table <- sjPlot:: tab_model(constraints.m1,
                                           show.std =TRUE,
                                           show.stat=TRUE,
                                           show.df=TRUE)
# boundary (singular) fit: see ?isSingular
constraints.m1.beta <- summary(effectsize::standardize(constraints.m1))
# boundary (singular) fit: see ?isSingular
constraints.interventions <- cbind(
  gen.beta(effectsize::standardize(constraints.m1)),
  gen.m(constraints.m1),
  gen.ci(constraints.m1)[3:10,]
) 
# boundary (singular) fit: see ?isSingular
# Computing profile confidence intervals ...
constraints.cooks<- constraints[which(cooks1 <= 4/264),]

# where are the influential observations?
nrow(constraints)
# [1] 264
nrow(constraints.cooks)
# [1] 249
constraints.summary <- constraints %>%
  group_by(paper,condition) %>%
  summarise(n = n())
# `summarise()` regrouping output by 'paper' (override with `.groups` argument)
constraints.cooksn <- constraints.cooks %>%
  group_by(paper,condition) %>%
  summarise(n_cooks = n()) 
# `summarise()` regrouping output by 'paper' (override with `.groups` argument)
constraints.where.influential <- full_join(constraints.summary,constraints.cooksn) %>%
  mutate(n_excluded = n - n_cooks)
# Joining, by = c("paper", "condition")
knitr::kable(constraints.where.influential)
paper condition n n_cooks n_excluded
liu2019 1_pickupglove 20 18 2
liu2019 2_pickupbarehand 20 19 1
liu2019 3_statechangebarrier 20 19 1
liu2019 3_statechangenobarrier 20 17 3
liu2019 4_statechangenotcausal 20 17 3
liu2019 5_statechangecausal 26 26 0
liu2019 5_statechangenotcausal 26 23 3
skerry2013 1_effectiveactiontraining 20 20 0
skerry2013 2_ineffectiveactiontraining 20 20 0
skerry2013 3_notraining 20 19 1
skerry2013 4_constrainedactionhabituation 26 26 0
skerry2013 5_unconstrainedactionhabituation 26 25 1
constraints.m1.cooks <- lmer(data = constraints.cooks,
     formula = look_pref ~ training_yesno + action_causal + action_consequence + actor_hand + agent_efficient_fam + ageday + (1|condition) + (1|experiment) + (1|paper),
     REML=FALSE)
# boundary (singular) fit: see ?isSingular
summary(constraints.m1.cooks)
# Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
#   method [lmerModLmerTest]
# Formula: look_pref ~ training_yesno + action_causal + action_consequence +  
#     actor_hand + agent_efficient_fam + ageday + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: constraints.cooks
# 
#      AIC      BIC   logLik deviance df.resid 
#   1484.8   1527.0   -730.4   1460.8      237 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.9748 -0.6479 -0.0429  0.6569  3.1180 
# 
# Random effects:
#  Groups     Name        Variance Std.Dev.
#  condition  (Intercept)  0.0     0.00    
#  experiment (Intercept)  0.0     0.00    
#  paper      (Intercept)  0.0     0.00    
#  Residual               20.7     4.55    
# Number of obs: 249, groups:  condition, 12; experiment, 5; paper, 2
# 
# Fixed effects:
#                      Estimate Std. Error       df t value   Pr(>|t|)    
# (Intercept)           -4.0078     3.6877 249.0000   -1.09      0.278    
# training_yesno1        1.9535     0.4773 249.0000    4.09 0.00005773 ***
# action_causal1         2.5634     0.4768 249.0000    5.38 0.00000017 ***
# action_consequence1    1.2255     0.6202 249.0000    1.98      0.049 *  
# actor_hand1            0.7581     0.8187 249.0000    0.93      0.355    
# actor_hand2            0.9867     0.8311 249.0000    1.19      0.236    
# agent_efficient_fam1   1.9383     0.4261 249.0000    4.55 0.00000844 ***
# ageday                 0.0281     0.0333 249.0000    0.84      0.400    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_cs1 actn_cn1 actr_1 actr_2 agn__1
# tranng_ysn1  0.061                                              
# action_csl1 -0.161  0.076                                       
# actn_cnsqn1 -0.007  0.058  0.330                                
# actor_hand1  0.120  0.226 -0.001    0.377                       
# actor_hand2 -0.047  0.223 -0.002   -0.744   -0.644              
# agnt_ffcn_1 -0.124  0.313  0.248    0.190    0.000 -0.001       
# ageday      -0.983 -0.018  0.070    0.035   -0.009 -0.025  0.050
# convergence code: 0
# boundary (singular) fit: see ?isSingular
constraints.m1.cooks.beta <- lmer(data = constraints.cooks,
     formula = scale(look_pref) ~ training_yesno + action_causal + action_consequence + actor_hand + agent_efficient_fam + scale(ageday) + (1|condition) + (1|experiment) + (1|paper),
     REML=FALSE)
# boundary (singular) fit: see ?isSingular
summary(constraints.m1.cooks.beta)
# Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
#   method [lmerModLmerTest]
# Formula: 
# scale(look_pref) ~ training_yesno + action_causal + action_consequence +  
#     actor_hand + agent_efficient_fam + scale(ageday) + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: constraints.cooks
# 
#      AIC      BIC   logLik deviance df.resid 
#    683.5    725.7   -329.8    659.5      237 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.9748 -0.6479 -0.0429  0.6569  3.1180 
# 
# Random effects:
#  Groups     Name        Variance Std.Dev.
#  condition  (Intercept) 0.000    0.00    
#  experiment (Intercept) 0.000    0.00    
#  paper      (Intercept) 0.000    0.00    
#  Residual               0.828    0.91    
# Number of obs: 249, groups:  condition, 12; experiment, 5; paper, 2
# 
# Fixed effects:
#                      Estimate Std. Error       df t value   Pr(>|t|)    
# (Intercept)           -0.3453     0.1338 249.0000   -2.58      0.010 *  
# training_yesno1        0.3908     0.0955 249.0000    4.09 0.00005773 ***
# action_causal1         0.5129     0.0954 249.0000    5.38 0.00000017 ***
# action_consequence1    0.2452     0.1241 249.0000    1.98      0.049 *  
# actor_hand1            0.1517     0.1638 249.0000    0.93      0.355    
# actor_hand2            0.1974     0.1663 249.0000    1.19      0.236    
# agent_efficient_fam1   0.3878     0.0853 249.0000    4.55 0.00000844 ***
# scale(ageday)          0.0490     0.0580 249.0000    0.84      0.400    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_cs1 actn_cn1 actr_1 actr_2 agn__1
# tranng_ysn1  0.238                                              
# action_csl1 -0.510  0.076                                       
# actn_cnsqn1  0.151  0.058  0.330                                
# actor_hand1  0.613  0.226 -0.001    0.377                       
# actor_hand2 -0.393  0.223 -0.002   -0.744   -0.644              
# agnt_ffcn_1 -0.415  0.313  0.248    0.190    0.000 -0.001       
# scale(agdy) -0.058 -0.018  0.070    0.035   -0.009 -0.025  0.050
# convergence code: 0
# boundary (singular) fit: see ?isSingular
sjPlot::plot_model(constraints.m1.cooks.beta,
                   type = "pred",
                   # colors = "bw",
                   sort.est=TRUE,
                   axis.title=c("Effect (Unexpected - Expected in seconds)"),
                   show.values=TRUE,
                   show.p=TRUE)
# $training_yesno

# 
# $action_causal

# 
# $action_consequence

# 
# $actor_hand

# 
# $agent_efficient_fam

# 
# $ageday

plot(allEffects(constraints.m1.cooks.beta))

#TODO figure out which levels of hand is which
sjPlot::plot_model(constraints.m1.cooks.beta,
                   type = "est",
                   colors = "bw",
                   sort.est=TRUE,
                   title = "",
                   # axis.labels= c("Age in days",
                   #                "Hand (gloved)",
                   #                "Hand (bare)",
                   #                "State change (vs location change)",
                   #                "Actor efficient during habituation",
                   #                "Sticky mittens training",
                   #                "Action causes change"),
                   axis.title=c("Effect on looking (unexpected - expected) in standard deviations"),
                   show.values=TRUE,
                   show.p=TRUE)

constraints.interventions.cooks <- cbind(
  gen.beta(effectsize::standardize(constraints.m1.cooks)),
  gen.m(constraints.m1.cooks),
  gen.ci(constraints.m1.cooks)[3:10,]
) 
# boundary (singular) fit: see ?isSingular
# Computing profile confidence intervals ...

When we compared the effects of different variants we found that motor training increased infants’ looking preference for the unexpected event ([5.387,6.39], ß=0.354, B=2.186, SE=0.617, p<.001, two-tailed). We also found that other interventions on the actions infants saw, including seeing an action that causes an effect on contact ([-11.821,6.036], ß=0.375, B=2.318, SE=0.594, p<.001, two-tailed). We did not find an effect of age, [-1.205,2.821], ß=0.033, B=0.023, SE=0.042, p=0.581, two-tailed. This analysis included 264 total infants, 15 of were classified as influential by Cook’s Distance. When these influential participants were excluded from the analysis, we found qualitatively equivalent results, except that the effect of seeing an action that resulted in a state change (versus location change) crossed the threshold into significance ([1.014,2.892], ß=0.245, B=1.226, SE=0.62, p=0.049, two-tailed).

Goals Task

Baseline Effect

goals <- ind.data %>% filter(task =="goals")

goals$action_consequence <- relevel(goals$action_consequence, ref = "none")
goals$agent <- relevel(goals$agent, ref = "object")
goals$training_yesno <- relevel(goals$training_yesno, ref = "yes")
goals$bothobjects_present_visible_fam <- relevel(goals$bothobjects_present_visible_fam, ref = "yes")
goals.baseline.data <- goals %>%
  filter(condition %in% c("1_control",
                          "1_twoobject") &
           paper %in% c("gerson2014a",
                        "luo2011"))
  
goals.b1 <- lmer(data = goals.baseline.data,
                       formula = look_pref ~ 1 + ageday + (1|condition))

goals.b1.table <- sjPlot:: tab_model(goals.b1,
                                           show.std =TRUE,
                                           show.stat=TRUE,
                                           show.df=TRUE)

goals.b1.beta <- summary(effectsize::standardize(goals.b1))

goals.baseline <- cbind(
  gen.beta(effectsize::standardize(goals.b1)),
  gen.m(goals.b1),
  gen.ci(goals.b1)[3:4,]
) 
# Computing profile confidence intervals ...
cooks1 <- cooks.distance(goals.b1, group = "subj")
# Warning in cooks.distance.lmerMod(goals.b1, group = "subj"): group is not a
# valid argument for this function. As of version 0.4.0, group has been replaced
# by level.
dotplot_diag(x = cooks1, cutoff = "internal", name = "cooks.distance",  index=goals.baseline.data$subj) + ylab("Cook's distance") + xlab("subjID")

excluded.subs <- c("2", "7", "6", "11") 

goals.b1.cooks <- lmer(data = goals.baseline.data %>%
                         filter(!subj %in% excluded.subs),
                       formula = look_pref ~ 1 + ageday + (1|condition))

summary(goals.b1.cooks)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: look_pref ~ 1 + ageday + (1 | condition)
#    Data: goals.baseline.data %>% filter(!subj %in% excluded.subs)
# 
# REML criterion at convergence: 239.2
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -1.6626 -0.5931  0.0995  0.4841  2.8202 
# 
# Random effects:
#  Groups    Name        Variance Std.Dev.
#  condition (Intercept) 202      14.2    
#  Residual              106      10.3    
# Number of obs: 32, groups:  condition, 2
# 
# Fixed effects:
#             Estimate Std. Error     df t value Pr(>|t|)
# (Intercept)   44.720     27.063 21.547    1.65     0.11
# ageday        -0.355      0.236 29.073   -1.51     0.14
# 
# Correlation of Fixed Effects:
#        (Intr)
# ageday -0.925
goals.baseline.cooks <- cbind(
  gen.beta(effectsize::standardize(goals.b1.cooks)),
  gen.m(goals.b1.cooks),
  gen.ci(goals.b1.cooks)[3:4,]
) 
# Computing profile confidence intervals ...
# Warning in nextpar(mat, cc, i, delta, lowcut, upcut): unexpected decrease in
# profile: using minstep
# Warning in profile.merMod(object, which = parm, signames = oldNames, ...): non-
# monotonic profile for (Intercept)
# Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for
# (Intercept): falling back to linear interpolation

For standard versions of the goals task, we found differential looking between the expected and unexpected events, Mean looking preference = 3.918 seconds, standard error (SE) = 2.807, [-0.931,-0.081], ß=-0.318, B=-0.503, SE=0.214, p=0.025, two-tailed. However, this was driven by 4 influential observations - without these 4 observations, there was no longer a reliable looking preference, [-0.815,0.126], ß=-0.21, B=-0.355, SE=0.236, p=0.143, two-tailed.

Intervention Analysis

goals.m1 <- lmer(data = goals,
     formula = look_pref ~ training_yesno  + action_consequence + location_object_goal_ambiguous + agent + bothobjects_present_visible_fam + ageday + (1|condition) + (1|experiment) + (1|paper),
     control = lmerControl(optimizer="Nelder_Mead"))
# boundary (singular) fit: see ?isSingular
summary(goals.m1)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: 
# look_pref ~ training_yesno + action_consequence + location_object_goal_ambiguous +  
#     agent + bothobjects_present_visible_fam + ageday + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: goals
# Control: lmerControl(optimizer = "Nelder_Mead")
# 
# REML criterion at convergence: 3022
# 
# Scaled residuals: 
#    Min     1Q Median     3Q    Max 
# -4.137 -0.456 -0.040  0.412  2.956 
# 
# Random effects:
#  Groups     Name        Variance      Std.Dev. 
#  condition  (Intercept)  16.053298091  4.006657
#  paper      (Intercept)   0.000000112  0.000335
#  experiment (Intercept)   0.000000778  0.000882
#  Residual               149.271235370 12.217661
# Number of obs: 386, groups:  condition, 14; paper, 6; experiment, 3
# 
# Fixed effects:
#                                  Estimate Std. Error       df t value Pr(>|t|)
# (Intercept)                        9.5238     5.8857 143.0053    1.62    0.108
# training_yesno1                    1.9406     2.2456   5.3129    0.86    0.425
# action_consequence1                2.9489     2.2291   9.9009    1.32    0.216
# location_object_goal_ambiguous1    4.7807     2.1749   9.5526    2.20    0.054
# agent1                             6.4554     2.6278  21.1836    2.46    0.023
# agent2                            -5.4021     2.7773   7.0042   -1.95    0.093
# bothobjects_present_visible_fam1   5.0575     1.9463  15.5349    2.60    0.020
# ageday                            -0.0677     0.0507 376.2822   -1.34    0.182
#                                   
# (Intercept)                       
# training_yesno1                   
# action_consequence1               
# location_object_goal_ambiguous1  .
# agent1                           *
# agent2                           .
# bothobjects_present_visible_fam1 *
# ageday                            
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_1 lc___1 agent1 agent2 bt___1
# tranng_ysn1  0.292                                          
# actn_cnsqn1 -0.216 -0.003                                   
# lctn_bjc__1 -0.001 -0.001  0.630                            
# agent1      -0.001  0.287 -0.005 -0.167                     
# agent2       0.009 -0.543 -0.161  0.105 -0.791              
# bthbjct___1 -0.131  0.003  0.270  0.160  0.563 -0.397       
# ageday      -0.898 -0.045  0.061  0.031  0.019 -0.045 -0.059
# convergence code: 0
# boundary (singular) fit: see ?isSingular
goals %>%
  select(paper, experiment, condition, training_yesno, object_diff_size_huge,action_consequence,location_object_goal_ambiguous,agent,bothobjects_present_visible_fam) %>%
  distinct() 
#          paper experiment                    condition training_yesno
# 1      luo2011          1                  1_twoobject             no
# 2      luo2011          1                  1_oneobject             no
# 3      luo2011          2         2_differentpositions             no
# 4  gerson2014a          1                     1_active            yes
# 5  gerson2014a          1                1_observation             no
# 6  gerson2014a          1                    1_control             no
# 7  gerson2014b          1                     1_active            yes
# 8  gerson2014b          1                1_observation             no
# 9  gerson2014b          2             2_generalization            yes
# 10  choi_unpub          1                1_equidistant             no
# 11  choi_unpub          1                 1_neartarget             no
# 12  choi_unpub          1                  1_fartarget             no
# 13    choi2018          1                  1_twoobject             no
# 14    choi2018          1                  1_oneobject             no
# 15    choi2018          1                     1_hidden             no
# 16   woo_unpub          1     1_statechange_objectswap             no
# 17   woo_unpub          2   2_disambiguatingobjectgoal             no
# 18   woo_unpub          3 3_disambiguatinglocationgoal             no
#    object_diff_size_huge action_consequence location_object_goal_ambiguous
# 1                     no               none                            yes
# 2                     no               none                            yes
# 3                     no               none                             no
# 4                     no               none                            yes
# 5                     no               none                            yes
# 6                     no               none                            yes
# 7                     no               none                            yes
# 8                     no               none                            yes
# 9                     no               none                            yes
# 10                   yes               none                            yes
# 11                   yes               none                            yes
# 12                   yes               none                            yes
# 13                   yes               none                            yes
# 14                   yes               none                            yes
# 15                   yes               none                            yes
# 16                    no       state_change                            yes
# 17                    no       state_change                             no
# 18                    no       state_change                             no
#     agent bothobjects_present_visible_fam
# 1  object                             yes
# 2  object                              no
# 3  object                              no
# 4    hand                             yes
# 5    hand                             yes
# 6    hand                             yes
# 7    hand                             yes
# 8    hand                             yes
# 9    hand                             yes
# 10 person                             yes
# 11 person                             yes
# 12 person                              no
# 13 person                             yes
# 14 person                             yes
# 15 person                             yes
# 16 person                             yes
# 17 person                             yes
# 18 person                             yes
# originally pre-registered model was rank deficient, needed to drop 2 variables
# dropped action_causal because it is redundant with action_consequence (i.e. there are no non-causal actions that are state changes)
# dropped object_diff_size_huge also because it is almost completely redundant with agent (i.e. all studies with a full human agent also are "yes" for "object_diff_size_huge")

# aa <- allFit(goals.m1)

#TODO figure out which levels of agent are in here
summary(goals.m1)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: 
# look_pref ~ training_yesno + action_consequence + location_object_goal_ambiguous +  
#     agent + bothobjects_present_visible_fam + ageday + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: goals
# Control: lmerControl(optimizer = "Nelder_Mead")
# 
# REML criterion at convergence: 3022
# 
# Scaled residuals: 
#    Min     1Q Median     3Q    Max 
# -4.137 -0.456 -0.040  0.412  2.956 
# 
# Random effects:
#  Groups     Name        Variance      Std.Dev. 
#  condition  (Intercept)  16.053298091  4.006657
#  paper      (Intercept)   0.000000112  0.000335
#  experiment (Intercept)   0.000000778  0.000882
#  Residual               149.271235370 12.217661
# Number of obs: 386, groups:  condition, 14; paper, 6; experiment, 3
# 
# Fixed effects:
#                                  Estimate Std. Error       df t value Pr(>|t|)
# (Intercept)                        9.5238     5.8857 143.0053    1.62    0.108
# training_yesno1                    1.9406     2.2456   5.3129    0.86    0.425
# action_consequence1                2.9489     2.2291   9.9009    1.32    0.216
# location_object_goal_ambiguous1    4.7807     2.1749   9.5526    2.20    0.054
# agent1                             6.4554     2.6278  21.1836    2.46    0.023
# agent2                            -5.4021     2.7773   7.0042   -1.95    0.093
# bothobjects_present_visible_fam1   5.0575     1.9463  15.5349    2.60    0.020
# ageday                            -0.0677     0.0507 376.2822   -1.34    0.182
#                                   
# (Intercept)                       
# training_yesno1                   
# action_consequence1               
# location_object_goal_ambiguous1  .
# agent1                           *
# agent2                           .
# bothobjects_present_visible_fam1 *
# ageday                            
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_1 lc___1 agent1 agent2 bt___1
# tranng_ysn1  0.292                                          
# actn_cnsqn1 -0.216 -0.003                                   
# lctn_bjc__1 -0.001 -0.001  0.630                            
# agent1      -0.001  0.287 -0.005 -0.167                     
# agent2       0.009 -0.543 -0.161  0.105 -0.791              
# bthbjct___1 -0.131  0.003  0.270  0.160  0.563 -0.397       
# ageday      -0.898 -0.045  0.061  0.031  0.019 -0.045 -0.059
# convergence code: 0
# boundary (singular) fit: see ?isSingular
sjPlot::plot_model(goals.m1,
                   type = "est",
                   colors = "bw",
                   sort.est=TRUE,
                   axis.title=c("Effect (Unexpected - Expected in seconds)"),
                   show.values=TRUE,
                   show.p=TRUE)

sjPlot:: tab_model(goals.m1, show.stat=TRUE)
  look pref
Predictors Estimates CI Statistic p
(Intercept) 9.52 -2.01 – 21.06 1.62 0.106
training_yesno1 1.94 -2.46 – 6.34 0.86 0.387
action_consequence1 2.95 -1.42 – 7.32 1.32 0.186
location_object_goal_ambiguous1 4.78 0.52 – 9.04 2.20 0.028
agent1 6.46 1.30 – 11.61 2.46 0.014
agent2 -5.40 -10.85 – 0.04 -1.95 0.052
bothobjects_present_visible_fam1 5.06 1.24 – 8.87 2.60 0.009
ageday -0.07 -0.17 – 0.03 -1.34 0.182
Random Effects
σ2 149.27
τ00 condition 16.05
τ00 paper 0.00
τ00 experiment 0.00
ICC 0.10
N condition 14
N experiment 3
N paper 6
Observations 386
Marginal R2 / Conditional R2 0.092 / 0.181
plot(allEffects(goals.m1))

goals.interventions<- cbind(
  gen.beta(effectsize::standardize(goals.m1)),
  gen.m(goals.m1),
  gen.ci(goals.m1)[4:11,]
) 
# boundary (singular) fit: see ?isSingular
# Computing profile confidence intervals ...
# Warning in nextpar(mat, cc, i, delta, lowcut, upcut): Last two rows have
# identical or NA .zeta values: using minstep
# Warning in FUN(X[[i]], ...): non-monotonic profile for .sig03
# Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit
# for .sig03: falling back to linear interpolation
# Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
# collapsing to unique 'x' values
cooks.goals <- cooks.distance(goals.m1, group = "subj")
# Warning in cooks.distance.lmerMod(goals.m1, group = "subj"): group is not a
# valid argument for this function. As of version 0.4.0, group has been replaced
# by level.
dotplot_diag(x = cooks.goals, cutoff = "internal", name = "cooks.distance",  index=goals$subj) + ylab("Cook's distance") + xlab("subjID")

goals.cooks <- goals[which(cooks.goals <= 4/386),]


# where are the influential observations?
nrow(goals)
# [1] 386
nrow(goals.cooks)
# [1] 367
goals.summary <- goals %>%
  group_by(paper,condition) %>%
  summarise(n = n())
# `summarise()` regrouping output by 'paper' (override with `.groups` argument)
goals.cooksn <- goals.cooks %>%
  group_by(paper,condition) %>%
  summarise(n_cooks = n()) 
# `summarise()` regrouping output by 'paper' (override with `.groups` argument)
goals.where.influential <- full_join(goals.summary, goals.cooksn) %>%
  mutate(n_excluded  = n - n_cooks)
# Joining, by = c("paper", "condition")
knitr::kable(goals.where.influential)
paper condition n n_cooks n_excluded
choi_unpub 1_equidistant 24 24 0
choi_unpub 1_fartarget 24 23 1
choi_unpub 1_neartarget 24 23 1
choi2018 1_hidden 16 16 0
choi2018 1_oneobject 16 15 1
choi2018 1_twoobject 16 16 0
gerson2014a 1_active 24 24 0
gerson2014a 1_control 24 24 0
gerson2014a 1_observation 24 24 0
gerson2014b 1_active 30 30 0
gerson2014b 1_observation 30 30 0
gerson2014b 2_generalization 30 30 0
luo2011 1_oneobject 12 7 5
luo2011 1_twoobject 12 6 6
luo2011 2_differentpositions 12 7 5
woo_unpub 1_statechange_objectswap 20 20 0
woo_unpub 2_disambiguatingobjectgoal 24 24 0
woo_unpub 3_disambiguatinglocationgoal 24 24 0
goals.m1.cooks <- lmer(data = goals.cooks,
     formula = look_pref ~ training_yesno  + action_consequence + location_object_goal_ambiguous + agent + bothobjects_present_visible_fam + ageday + (1|condition) + (1|experiment) + (1|paper),
     control = lmerControl(optimizer="Nelder_Mead"))
# boundary (singular) fit: see ?isSingular
fixef(goals.m1.cooks)
#                      (Intercept)                  training_yesno1 
#                          6.81477                          1.86165 
#              action_consequence1  location_object_goal_ambiguous1 
#                          2.49725                          4.86346 
#                           agent1                           agent2 
#                          4.61178                         -3.97153 
# bothobjects_present_visible_fam1                           ageday 
#                          2.56006                         -0.02759
fixef(goals.m1.cooks,add.dropped=TRUE)
#                      (Intercept)                  training_yesno1 
#                          6.81477                          1.86165 
#              action_consequence1  location_object_goal_ambiguous1 
#                          2.49725                          4.86346 
#                           agent1                           agent2 
#                          4.61178                         -3.97153 
# bothobjects_present_visible_fam1                           ageday 
#                          2.56006                         -0.02759
summary(goals.m1.cooks)
# Linear mixed model fit by REML. t-tests use Satterthwaite's method [
# lmerModLmerTest]
# Formula: 
# look_pref ~ training_yesno + action_consequence + location_object_goal_ambiguous +  
#     agent + bothobjects_present_visible_fam + ageday + (1 | condition) +  
#     (1 | experiment) + (1 | paper)
#    Data: goals.cooks
# Control: lmerControl(optimizer = "Nelder_Mead")
# 
# REML criterion at convergence: 2744
# 
# Scaled residuals: 
#    Min     1Q Median     3Q    Max 
# -3.596 -0.483 -0.053  0.466  3.468 
# 
# Random effects:
#  Groups     Name        Variance Std.Dev.
#  condition  (Intercept)   9.63    3.1    
#  paper      (Intercept)   0.00    0.0    
#  experiment (Intercept)   0.00    0.0    
#  Residual               105.05   10.2    
# Number of obs: 367, groups:  condition, 14; paper, 6; experiment, 3
# 
# Fixed effects:
#                                  Estimate Std. Error       df t value Pr(>|t|)
# (Intercept)                        6.8148     5.0804 160.0703    1.34    0.182
# training_yesno1                    1.8617     1.7692   4.7191    1.05    0.344
# action_consequence1                2.4973     1.8530   9.4820    1.35    0.209
# location_object_goal_ambiguous1    4.8635     1.8569   9.8464    2.62    0.026
# agent1                             4.6118     2.5171  24.1382    1.83    0.079
# agent2                            -3.9715     2.3218   7.0646   -1.71    0.131
# bothobjects_present_visible_fam1   2.5601     1.7217  11.4617    1.49    0.164
# ageday                            -0.0276     0.0441 358.0149   -0.63    0.532
#                                   
# (Intercept)                       
# training_yesno1                   
# action_consequence1               
# location_object_goal_ambiguous1  *
# agent1                           .
# agent2                            
# bothobjects_present_visible_fam1  
# ageday                            
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#             (Intr) trnn_1 actn_1 lc___1 agent1 agent2 bt___1
# tranng_ysn1  0.274                                          
# actn_cnsqn1 -0.198 -0.003                                   
# lctn_bjc__1 -0.003 -0.001  0.664                            
# agent1       0.045  0.238 -0.047 -0.206                     
# agent2      -0.011 -0.513 -0.135  0.126 -0.797              
# bthbjct___1 -0.118  0.004  0.258  0.126  0.554 -0.437       
# ageday      -0.906 -0.049  0.053  0.028 -0.010 -0.025 -0.074
# convergence code: 0
# boundary (singular) fit: see ?isSingular
plot(allEffects(goals.m1.cooks))

goals.interventions.cooks<- cbind(
  gen.beta(effectsize::standardize(goals.m1.cooks)),
  gen.m(goals.m1.cooks),
  gen.ci(goals.m1.cooks)[4:11,]
) 
# boundary (singular) fit: see ?isSingular
# Computing profile confidence intervals ...

We note that the analysis we reported deviates from our pre-registration plan. In that plan, we included 8 fixed effects, including a fixed effect for age in days, one picking out a control condition, and 6 others. However, 2 of these fixed effects were either completely or partially redundant with other predictors in the model, which caused a rank deficiency issue. Thus, we dropped these two fixed effects from the model. We report this new model below.

When we compared the effects of different variants, we found that making the goal of the agent’s actions unambiguous (marginal effect, [-0.967,6.147], ß=0.37, B=4.781, SE=2.175, p=0.054, two-tailed), or presenting a self-propelled object (marginal effect, [0.993,7.911], ß=0.5, B=6.455, SE=2.628, p=0.023, two-tailed) or an entire person ([1.982,10.67], ß=-0.418, B=-5.402, SE=2.777, p=0.093, two-tailed), relative to seeing a hand appear from off-stage, increased infants’ looking preference for the unexpected event. We did not find any other effects, including age, motor training, or action consequences, when taking into account all of these predictors in the same model. We did not find an effect of age, [1.209,7.725], ß=-0.068, B=-0.068, SE=0.051, p=0.182, two-tailed. These analyses included 386 total infants, 19 of whom were classified as influential based on Cook’s Distance. Excluding these influential participants in the analysis produced similar results, except that the goal ambiguity effect crossed the threshold into significance ([-0.754,4.979], ß=0.451, B=4.863, SE=1.857, p=0.026, two-tailed), and the two effects of the agent type crossed the threshold out of significance (marginal effect of object, [1.421,7.294], ß=0.428, B=4.612, SE=2.517, p=0.079, two-tailed; non-significant effect of entire person, [0.994,9.402], ß=-0.368, B=-3.972, SE=2.322, p=0.131, two-tailed).